{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "06e5af38-85ad-46b6-a5ea-52a7ff947853",
   "metadata": {},
   "source": [
    "# Labelme转COCO-批量\n",
    "\n",
    "同济子豪兄 呕心沥血写成\n",
    "\n",
    "兼容目标检测、图像分割、关键点检测三种标注\n",
    "\n",
    "2023-3-9\n",
    "\n",
    "2023-3-10\n",
    "\n",
    "2023-4-15\n",
    "\n",
    "2023-4-16\n",
    "\n",
    "2023-4-21"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "b0a8a2a1-fb46-4d51-8b4f-cd0568bc13f8",
   "metadata": {},
   "source": [
    "## 导入工具包"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "5969d148-19e6-4b2e-9797-01105b87b56c",
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import json\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "75c4fd53-f3ce-4a27-9024-6abec83717d9",
   "metadata": {},
   "source": [
    "## 删除系统自动生成的多余文件"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "b7ecd3ad-874c-46cc-ac47-6f4ae3adf07a",
   "metadata": {},
   "source": [
    "### 查看待删除的多余文件"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "a8356247-4b89-42e9-aa87-4c630f05e1f8",
   "metadata": {},
   "outputs": [],
   "source": [
    "!find . -iname '__MACOSX'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "a1b64d9d-bdf8-43fe-8321-bb6d241137dc",
   "metadata": {},
   "outputs": [],
   "source": [
    "!find . -iname '.DS_Store'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "a5f038c2-72ba-4724-a190-b2ec97d8bcf0",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "./【C】Labelme转COCO-单个文件/.ipynb_checkpoints\n",
      "./Ruler_15_Dataset/labelme_jsons/val_labelme_jsons/.ipynb_checkpoints\n",
      "./.ipynb_checkpoints\n"
     ]
    }
   ],
   "source": [
    "!find . -iname '.ipynb_checkpoints'"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "191ccc29-baeb-46ff-8125-71c49adee533",
   "metadata": {},
   "source": [
    "### 删除多余文件"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "4bf04f95-0750-4248-8696-fa2aa4ff03e5",
   "metadata": {},
   "outputs": [],
   "source": [
    "!for i in `find . -iname '__MACOSX'`; do rm -rf $i;done"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "217b2325-835b-451c-89d7-3d39c5dfc5c2",
   "metadata": {},
   "outputs": [],
   "source": [
    "!for i in `find . -iname '.DS_Store'`; do rm -rf $i;done"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "cfb7afa4-106a-4e6a-ab35-86f0eb902432",
   "metadata": {},
   "outputs": [],
   "source": [
    "!for i in `find . -iname '.ipynb_checkpoints'`; do rm -rf $i;done"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "8a2904f2-93ba-4726-9b2b-b8285511a760",
   "metadata": {},
   "source": [
    "### 验证多余文件已删除"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "33239471-a70e-426d-beba-3517c14311e6",
   "metadata": {},
   "outputs": [],
   "source": [
    "!find . -iname '__MACOSX'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "f82a7703-8b94-4103-9672-ec29849616b9",
   "metadata": {},
   "outputs": [],
   "source": [
    "!find . -iname '.DS_Store'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "0101460a-d0bc-4388-aa0b-d30b0f0ef538",
   "metadata": {},
   "outputs": [],
   "source": [
    "!find . -iname '.ipynb_checkpoints'"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "092bec89-6d83-4800-9cc9-3619896b2572",
   "metadata": {},
   "source": [
    "## 指定数据集信息"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "30a7e9be-197e-4a5a-af0b-c54226edf48d",
   "metadata": {},
   "outputs": [],
   "source": [
    "Dataset_root = 'Ruler_15_Dataset'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "048d587f-a9eb-4666-9eb4-66c1e78edb95",
   "metadata": {},
   "outputs": [],
   "source": [
    "class_list= [\n",
    "    {'id': 0, 'name': 'Triangle_30'},\n",
    "    {'id': 1, 'name': 'Triangle_45'},\n",
    "    {'id': 2, 'name': 'Angle'},\n",
    "    {'id': 3, 'name': 'Ruler'}\n",
    "]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "3a2eaf47-e8ce-40e0-9bba-3e783797b6c0",
   "metadata": {},
   "outputs": [],
   "source": [
    "label2id = {}\n",
    "for each in class_list:\n",
    "    label2id[each['name']] = each['id']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "bfcf9bce-153d-481b-9534-f872533d733c",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'Triangle_30': 0, 'Triangle_45': 1, 'Angle': 2, 'Ruler': 3}"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "label2id"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "70d08e3a-1c6e-4f2c-a2fd-32fd874debdb",
   "metadata": {},
   "source": [
    "## 函数-处理单个labelme标注json文件"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "a53dbf74-7e72-4ef0-b39e-e03baa198a21",
   "metadata": {},
   "outputs": [],
   "source": [
    "def process_single_json(labelme, image_id=1):\n",
    "    '''\n",
    "    输入labelme的json数据，输出coco格式的每个框的关键点标注信息\n",
    "    '''\n",
    "    \n",
    "    global ANN_ID\n",
    "    \n",
    "    coco_annotations = []\n",
    "    \n",
    "    for each_ann in labelme['shapes']: # 遍历该json文件中的所有标注\n",
    "\n",
    "        if each_ann['shape_type'] == 'rectangle': # 筛选出框\n",
    "\n",
    "            ## 该框的元数据\n",
    "            bbox_dict = {}\n",
    "            # 该框的类别 信息\n",
    "            bbox_dict['category_id'] = label2id[each_ann['label']]\n",
    "            bbox_dict['segmentation'] = []\n",
    "            \n",
    "            bbox_dict['iscrowd'] = 0\n",
    "            bbox_dict['image_id'] = image_id\n",
    "            bbox_dict['id'] = ANN_ID\n",
    "            ANN_ID += 1\n",
    "\n",
    "            # 获取框坐标\n",
    "            bbox_left_top_x = min(int(each_ann['points'][0][0]), int(each_ann['points'][1][0]))\n",
    "            bbox_left_top_y = min(int(each_ann['points'][0][1]), int(each_ann['points'][1][1]))\n",
    "            bbox_right_bottom_x = max(int(each_ann['points'][0][0]), int(each_ann['points'][1][0]))\n",
    "            bbox_right_bottom_y = max(int(each_ann['points'][0][1]), int(each_ann['points'][1][1]))\n",
    "            bbox_w = bbox_right_bottom_x - bbox_left_top_x\n",
    "            bbox_h = bbox_right_bottom_y - bbox_left_top_y\n",
    "            bbox_dict['bbox'] = [bbox_left_top_x, bbox_left_top_y, bbox_w, bbox_h] # 左上角x、y、框的w、h\n",
    "            bbox_dict['area'] = bbox_w * bbox_h\n",
    "            \n",
    "            # 筛选出分割多段线\n",
    "            for each_ann in labelme['shapes']: # 遍历所有标注\n",
    "                if each_ann['shape_type'] == 'polygon': # 筛选出分割多段线标注\n",
    "                    # 第一个点的坐标\n",
    "                    first_x = each_ann['points'][0][0]\n",
    "                    first_y = each_ann['points'][0][1]\n",
    "                    if (first_x>bbox_left_top_x) & (first_x<bbox_right_bottom_x) & (first_y<bbox_right_bottom_y) & (first_y>bbox_left_top_y): # 筛选出在该个体框中的关键点\n",
    "                        bbox_dict['segmentation'] = list(map(lambda x: list(map(lambda y: round(y, 2), x)), each_ann['points'])) # 坐标保留两位小数\n",
    "                    \n",
    "            coco_annotations.append(bbox_dict)\n",
    "            \n",
    "    return coco_annotations"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "fd555105-e8ab-4a63-a2b1-6b2cf97b5949",
   "metadata": {
    "tags": []
   },
   "source": [
    "## 函数-转换当前目录下所有labelme格式的json文件"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "2cabd690-f506-4409-91e2-72b58778be09",
   "metadata": {},
   "outputs": [],
   "source": [
    "def process_folder():\n",
    "    IMG_ID = 0\n",
    "    ANN_ID = 0\n",
    "\n",
    "    # 遍历所有 labelme 格式的 json 文件\n",
    "    for labelme_json in os.listdir(): \n",
    "\n",
    "        if labelme_json.split('.')[-1] == 'json':\n",
    "\n",
    "            with open(labelme_json, 'r', encoding='utf-8') as f:\n",
    "\n",
    "                labelme = json.load(f)\n",
    "\n",
    "                ## 提取图像元数据\n",
    "                img_dict = {}\n",
    "                img_dict['file_name'] = labelme['imagePath']\n",
    "                img_dict['height'] = labelme['imageHeight']\n",
    "                img_dict['width'] = labelme['imageWidth']\n",
    "                img_dict['id'] = IMG_ID\n",
    "                coco['images'].append(img_dict)\n",
    "\n",
    "                ## 提取框和关键点信息\n",
    "                coco_annotations = process_single_json(labelme, image_id=IMG_ID)\n",
    "                coco['annotations'] += coco_annotations\n",
    "\n",
    "                IMG_ID += 1\n",
    "\n",
    "                print(labelme_json, '已处理完毕')\n",
    "\n",
    "        else:\n",
    "            pass"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "f6aabc06-9a26-454f-81af-c534b0a0ae8f",
   "metadata": {},
   "source": [
    "## 转换训练集的所有labelme标注文件"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "d679480d-4d4a-4860-9163-a21d05af8d16",
   "metadata": {},
   "outputs": [],
   "source": [
    "coco = {}\n",
    "\n",
    "coco['categories'] = class_list\n",
    "\n",
    "coco['images'] = []\n",
    "coco['annotations'] = []\n",
    "\n",
    "IMG_ID = 0\n",
    "ANN_ID = 0"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "1d84ea80-0f61-47e1-92a4-073e3788218a",
   "metadata": {},
   "outputs": [],
   "source": [
    "path = os.path.join(Dataset_root, 'labelme_jsons', 'train_labelme_jsons')\n",
    "os.chdir(path)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "8099ada2-2d02-4aaa-acf2-6dd32bef9968",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2.json 已处理完毕\n",
      "10.json 已处理完毕\n",
      "13.json 已处理完毕\n",
      "4.json 已处理完毕\n",
      "14.json 已处理完毕\n",
      "9.json 已处理完毕\n",
      "15.json 已处理完毕\n",
      "5.json 已处理完毕\n",
      "12.json 已处理完毕\n",
      "7.json 已处理完毕\n",
      "11.json 已处理完毕\n",
      "6.json 已处理完毕\n"
     ]
    }
   ],
   "source": [
    "process_folder()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "ceff4939-0e59-445b-84de-ecda71077d7c",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 保存coco标注文件\n",
    "coco_path = '../../train_coco.json'\n",
    "with open(coco_path, 'w') as f:\n",
    "    json.dump(coco, f, indent=2)\n",
    "    \n",
    "os.chdir('../../')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "0c9be517-917d-4e03-a5cb-c73646b3a622",
   "metadata": {},
   "source": [
    "## 验证训练集的MS COCO格式标注"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "5b0c450f-995b-4832-93d8-2a7ff90ac7d5",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['images', 'labelme_jsons', 'train_coco.json']"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "os.listdir()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "id": "10fa6971-4520-41fd-b6ca-5560bd2dad03",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "loading annotations into memory...\n",
      "Done (t=0.00s)\n",
      "creating index...\n",
      "index created!\n"
     ]
    }
   ],
   "source": [
    "from pycocotools.coco import COCO\n",
    "\n",
    "my_coco = COCO('train_coco.json')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "d91c7c7b-3de8-40e8-ab49-127d5d824e77",
   "metadata": {},
   "source": [
    "## 转换测试集的所有labelme标注文件"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "id": "96914155-b40a-4a12-b070-0ca6e888139d",
   "metadata": {},
   "outputs": [],
   "source": [
    "coco = {}\n",
    "\n",
    "coco['categories'] = class_list\n",
    "\n",
    "coco['images'] = []\n",
    "coco['annotations'] = []\n",
    "\n",
    "IMG_ID = 0\n",
    "ANN_ID = 0"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "id": "b4f414b5-5ee2-42d7-b19c-cac0d4d452f1",
   "metadata": {},
   "outputs": [],
   "source": [
    "path = os.path.join('labelme_jsons', 'val_labelme_jsons')\n",
    "os.chdir(path)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "id": "2a55e0f3-f0ca-4095-8ba6-310f0f42ae59",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "8.json 已处理完毕\n",
      "1.json 已处理完毕\n",
      "3.json 已处理完毕\n"
     ]
    }
   ],
   "source": [
    "process_folder()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "id": "4d0f345f-ef7a-400e-ad69-657163d98d65",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 保存coco标注文件\n",
    "coco_path = '../../val_coco.json'\n",
    "with open(coco_path, 'w') as f:\n",
    "    json.dump(coco, f, indent=2)\n",
    "    \n",
    "os.chdir('../../')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "51da0a5c-936f-4747-9d08-ecacfbab5942",
   "metadata": {},
   "source": [
    "## 验证测试集的MS COCO格式标注"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "id": "49833294-7839-4126-ae49-86761b0e1d90",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "loading annotations into memory...\n",
      "Done (t=0.00s)\n",
      "creating index...\n",
      "index created!\n"
     ]
    }
   ],
   "source": [
    "from pycocotools.coco import COCO\n",
    "\n",
    "my_coco = COCO('val_coco.json')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "66311282-8d71-4618-8b8e-f7e2343829d6",
   "metadata": {},
   "source": [
    "## 删除labelme标注目录"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "id": "c27cf5a5-8361-41da-8237-e7d8c2be8b81",
   "metadata": {},
   "outputs": [],
   "source": [
    "!rm -rf labelme_jsons"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "id": "32c73f79-4665-45a2-973e-3ce16635999a",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['images', 'train_coco.json', 'val_coco.json']"
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "os.listdir()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "fea087e7-98fa-4718-92f6-48d818207617",
   "metadata": {
    "tags": []
   },
   "source": [
    "## 删除系统自动生成的多余文件"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "31de4343-bd7e-458d-b529-64d78e172bbe",
   "metadata": {},
   "source": [
    "### 查看待删除的多余文件"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "47cb1ce5-0b0a-4ac9-94df-03eb3c60896f",
   "metadata": {},
   "outputs": [],
   "source": [
    "!find . -iname '__MACOSX'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "53444a3b-6e48-4eeb-a015-1e34a53cbb3f",
   "metadata": {},
   "outputs": [],
   "source": [
    "!find . -iname '.DS_Store'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "07170b16-6274-4719-bc51-070a2cc9ab4d",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "./【C】Labelme转COCO-单个文件/.ipynb_checkpoints\n",
      "./Ruler_15_Dataset/.ipynb_checkpoints\n",
      "./.ipynb_checkpoints\n"
     ]
    }
   ],
   "source": [
    "!find . -iname '.ipynb_checkpoints'"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "f403262f-2404-4630-9d1f-984626bb44af",
   "metadata": {},
   "source": [
    "### 删除多余文件"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "438ff972-62b3-4b5f-a9f0-e4502e043690",
   "metadata": {},
   "outputs": [],
   "source": [
    "!for i in `find . -iname '__MACOSX'`; do rm -rf $i;done"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "bd9950e2-10d7-4483-9e9e-fc4ea0c9ac77",
   "metadata": {},
   "outputs": [],
   "source": [
    "!for i in `find . -iname '.DS_Store'`; do rm -rf $i;done"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "616bac5e-aba3-459a-9a17-e60fd6ed7149",
   "metadata": {},
   "outputs": [],
   "source": [
    "!for i in `find . -iname '.ipynb_checkpoints'`; do rm -rf $i;done"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "72fcfcfc-3077-4c06-ad22-8986e02a0a38",
   "metadata": {},
   "source": [
    "### 验证多余文件已删除"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "d652b73f-5cfb-4553-9557-d86f9659fc59",
   "metadata": {},
   "outputs": [],
   "source": [
    "!find . -iname '__MACOSX'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "f610d58a-5e99-4e36-b860-b593b9ef3aed",
   "metadata": {},
   "outputs": [],
   "source": [
    "!find . -iname '.DS_Store'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "ca76960c-88fc-484d-b305-861bde35d44a",
   "metadata": {},
   "outputs": [],
   "source": [
    "!find . -iname '.ipynb_checkpoints'"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "3fbb8917-6ed8-47d5-be73-7fa810385268",
   "metadata": {},
   "source": [
    "## 得到完整的MS COCO格式的数据集"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
